import requests
from bs4 import BeautifulSoup
import time

base_url = 'https://movie.douban.com/subject/5968334/comments'

headers = { 
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
}

for i in range(1500):
    page_url = f'{base_url}?start={i*20}&limit=20'

    response = requests.get(page_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        comment_list = soup.find_all('div',class_='comment')

        for comment in comment_list:
            #commenter = comment.find('a',class_='').text
            content = comment.find('p',class_='comment-content').text.strip()
            rating_tag = comment.find('span', class_='rating')
            rating = rating_tag['title'] if rating_tag else '无评分'
            #print(f'评分: {rating}')
            #print(f'评论内容: {content}\n')
            with open('output.txt', 'a', encoding='utf-8') as f:
    # 写入内容（需要手动添加换行符\n）
                f.write(f'评分: {rating}\n')
                f.write(f'评论内容: {content}\n')